/*==================================================
project:       Cleans up SISBEN for use
Author:        David L. Vargas 
E-email:       davidvar@iadb.org
url:           
Dependencies:  
----------------------------------------------------
Creation Date:    22 Aug 2022 - 22:03:23
Modification Date:   
Do-file version:    01
References:          
Output:             
==================================================*/

 /*==================================================
               0: Program set up
 ==================================================*/
 *Written on STATA 17
 drop _all
 set varabbrev off	// no variable abbreviations allowed (personal preference)
 
 /*==================================================
               1: Construct GEIH
 ==================================================*/
 
 forv year = 2019/2021 {

	glo geih "${dir2r}/_aux/DANE/GEIH/`year'/Noviembre.dta"
	 

	/*GEIH is split by area and group of data*/
	loc areas "Área Cabecera Resto"
	loc kinds `" "Características generales (Personas)" "Vivienda y Hogares" "Fuerza de trabajo" "Desocupados" "Inactivos" "Ocupados" "Otras actividades y ayudas en la semana" "Otros ingresos" "'
	 
	*----------1.1.2: create an unique dataset
	 
	cd "${geih}"
	  
	if `year' == 2020 {
		// join by area (no area in 2020)
		// join by kind
		loc j = 0
		foreach k of local kinds {
			loc ++j 
			loc k2 = subinstr(lower("`k'")," ","_",.)
			loc k2 = substr("`k2'",1,15)
			loc filename `"`k'.dta"'
			if `j' == 1	use "`filename'", replace
			else{
				
				noi di "`filename'"	
				if ("`k'" == "Vivienda y Hogares")	loc keys "m:1 DIRECTORIO SECUENCIA_P"
				else								loc keys "1:1 DIRECTORIO SECUENCIA_P ORDEN"
				noi merge `keys' using "`filename'", gen(_m_`k2')
			}
		}

	}
	else {
		// join by area
		foreach a of local areas {
		
			// join by kind
			loc j = 0
			foreach k of local kinds {
				loc ++j 
				loc k2 = subinstr(lower("`k'")," ","_",.)
				loc k2 = substr("`k2'",1,15)
				loc filename `"`a' - `k'.dta"'
				if `j' == 1	use "`filename'", replace
				else{
					
					noi di "`filename'"	
					if ("`k'" == "Vivienda y Hogares")	loc keys "m:1 DIRECTORIO SECUENCIA_P"
					else								loc keys "1:1 DIRECTORIO SECUENCIA_P ORDEN"
					noi merge `keys' using "`filename'", gen(_m_`k2')
				}
			}
			
			tempfile `a'
			gen area_s = "`a'"
			save ``a'', replace	
		}
	
		// Append files 
		loc k = 0
		foreach a of local areas {
			loc ++k
			if `k' == 1	use ``a'', replace
			else		app using ``a''
		
		}
		
		// area is contained in cabecera, lets tag and drop 
		gen _aux = area_s == "Área"
		bys DIRECTORIO SECUENCIA_P : egen area_metro = max(_aux)
		drop if _aux == 1
		drop _aux

	}
	 
	destring, replace


	// add right expansion factor

	preserve 

	if 		`year' == 2019 	use "${dir2r}/_aux/DANE/GEIH/`year'/Fex proyeccion CNPV_2018.dta", clear
	else if `year' == 2020 	use "${dir2r}/_aux/DANE/GEIH/`year'/Fex proyeccion CNPV_2018_2020.dta", clear
	else if `year' == 2021 	use "${dir2r}/_aux/DANE/GEIH/`year'/Fex proyeccion CNPV_2018.dta", clear
	
	rename *, upper

	keep if MES == 11

	keep DIRECTORIO SECUENCIA_P ORDEN FEX_C18

	tempfile newweight
	save `newweight'

	restore 

	merge m:1 DIRECTORIO SECUENCIA_P ORDEN using `newweight', keep(3) nogen  
	
	tempfile geih
	save `geih', replace
	 
	 /*==================================================
				   2: Build income variables
	 ==================================================*/
	 
	 *---------- 1.1.3: Gen total income 
	 
	 use `geih', clear  

	 if `year' != 2020 loc areavar "area_s"
	 else 			   loc areavar ""
	 
	 if `year' == 2020 rename FEX_C fex_c_2011

	 // keep relevant income variables 
	 keep DIRECTORIO SECUENCIA_P ORDEN HOGAR `areavar' ///
	 	 P6050 /// jefe de hogar
		 P6500 /// INGRESO LABORAL (PRINCIPAL)
		 P6620S1 /// PAGOS NO MONETARIOS (LABORALES)
		 P6510S1 /// INGRESOS POR HORAS EXTRAS
		 P6510S2  /// DUMMY SI SUMO HORAS EXTRAS EN PREGUNTA INC LABORAL
		 P6585S1A1 /// SUBSIDIO ALIMENTARIO (EMPLEADOR)
		 P6585S1A2 /// DUMMY SI SUMO SUBSIDIO EN PREGUNTA INC LABORAL
		 P6585S2A1 /// SUBSIDIO TRANSPORTE
		 P6585S2A2 /// SUBSIDIO TRANSPORTE SUMO A SALARIO
		 P6585S3A1 /// SUBSIDIO FAMILIAR
		 P6585S3A2 /// SUBSIDIO FAMILIAR SUMO A SALARIO
		 P6585S4A1 /// SUBSIDIO EDUCATIVO
		 P6585S4A2 /// SUBSIDIO EDUCATIVO SUMO A SALARIO
		 P6630S4A1 /// VIATICOS PERMANENTES 
		 P6545S1 /// PRIMAS
		 P6545S2 /// SUMO LAS PRIMAS EN EN LA PREGUTNA DE INC LABORAL
		 P6630S1 /// RECIBIO PRIMA DE SERVICIOS EN EL ULTIMO AÑO
		 P6630S1A1 /// VALOR PRIMA 
		 P6630S2A1 /// PRIMA DE NAVIDAD 
		 P6630S3A1 /// PRIMA DE VACACIONES
		 P7070 /// INGRESO SEGUNDO TRABAJO
		 P7422S1 /// INGRESOS LABORALES NO OCUPADOS
		 P7472S1 /// INGRESOS LABORALES INACTIVOS
		 P7500S1A1 /// INGRESOS ARRIENDOS
		 P7500S2A1 /// PENSIONES
		 P7500S3A1 /// ALIMONIES 
		 P7510S1A1 /// REMESAS
		 P7510S2A1 /// REMESAS INT.
		 P7510S3A1 /// TRANSFERENCIAS ()
		 P7510S5A1 /// PAGO INTERESES CDTs
		 P7510S6A1 /// PAGO INTERESES CESANTIAS
		 P7510S7A1 /// DINERO DE OTRAS FUENTES (EXTRAORDIANRIOS)
		 P6580 /// Bonifiaciones 
		 P6580S1 /// cuanto bonificaciones
		 P6580S2 /// Incluyo bonificaciones
		 P6750 /// Ingreso neto negocios 
		 P6760 /// meses ganancia negocio 
		 P550 /// ganacia cosecha
		 fex_c_2011
		 
	// keep only the head	 
	keep if P6050 == 1 

	global incvars P6510S1 P6620S1 P6585S1A1 P6545S1 P6630S1A1 P6630S2A1 P6630S3A1 P7070 P7422S1 P7472S1 P7500S1A1 P6585S2A1 P6585S3A1 P6585S4A1 P7500S2A1 P7500S3A1 P7510S1A1 P7510S2A1 P7510S3A1 P7510S5A1 P7510S6A1 P7510S7A1 P6580S1 P6750 P550 P6630S4A1
		 
	 // clean variables
	 
	foreach v of varlist _all {
		cap replace `v' = . if `v' == -98
		cap replace `v' = . if `v' == 98
	}

	foreach v of varlist _all {
	cap replace `v' = 0 if `v' == .
	}


	*****  Constant prices nov 2019
	if `year' != 2019 {
		scalar pi_2020 = 0.985
 		scalar pi_2021 = 0.936

		foreach v of global incvars {
			// multiplier to adjust by inflation
			if (`year'==2020)			loc mult "* `=pi_2020' "
			if (`year'==2021)			loc mult "* `=pi_2021'"
			else 						loc mult ""
			
			replace `v' = (`v' `mult' )
		}	

	} 

	// Income business monthly 
	replace P6750 = P6750 / P6760
	replace P550 = P550 /12
	replace P6630S4A1 = P6630S4A1 / 12
	gen gov_income = P7510S3A1 / 12 // Question is related to trasnfers from gov + NGOs in a 12 month period
	replace P7510S1A1 = P7510S1A1 / 12
	replace P7510S2A1 = P7510S2A1 / 12

	replace P7510S5A1  = P7510S5A1 / 12 
	replace P7510S6A1  = P7510S6A1 / 12
	replace P7510S7A1  = P7510S7A1 / 12
	 
	// GET LABORAL INCOME NET OF EXTRA TIME AND OTHER BENEFITS
	replace P6500 = P6500 - P6510S1 if P6510S2 == 1 & !missing(P6510S2)	// horas extra
	replace P6500 = P6500 - P6585S1A1 if P6585S1A2 == 1 & !missing(P6585S1A2) // subsidio alimentario 
	replace P6500 = P6500 - P6585S2A1 if P6585S2A2 == 1 & !missing(P6585S2A2) // subsidio transporte 
	replace P6500 = P6500 - P6585S3A1 if P6585S3A2 == 1 & !missing(P6585S3A2) // subsidio familiar 
	replace P6500 = P6500 - P6585S4A1 if P6585S4A2 == 1 & !missing(P6585S4A2) // subsidio educativo 
	replace P6500 = P6500 - P6545S1 if P6545S2 == 1 & !missing(P6545S2) // primas
	replace P6500 = P6500 - P6580S1 if P6580S2 == 1 & !missing(P6580S2) // Bonificaciones
	replace P6500 = 0 if P6500 == .	 
	
	drop P6510S2 P6585S1A2 P6545S2
	
	replace gov_income = 0 if !missing(gov_income)
	
	// total income value
	
	** regular income
	egen inc1 = rowtotal(P6500 P6510S1 ///
							P7070 P7422S1 P7472S1 /// Otros ingresos laborales
							P6585S1A1 P6585S2A1 P6585S3A1 P6585S4A1 /// subsidios laborales 
							P6630S4A1 /// viaticos permanentes 
							P7500S1A1 P7500S2A1 P7500S3A1 /// pensiones, arriendos y alimonies 
							gov_income /// 
							P7510S1A1 P7510S2A1 /// /* Added remesas */
							P6750 P550  ) /* Added income businness)*/
	* P6620S1 // pagos no monetarios excluded							 
	*  // subsidio alimentario 
	gen inc1_nt = inc1 - gov_income // This is an odd question but close enough
	replace inc1_nt = 0 if inc1_nt < 0 & !missing(inc1_nt)
							 
	 ** total income 	 
	 egen inc1_nov`year' = rowtotal(P6500 P6510S1 P6620S1 P6585S1A1 P6545S1  ///
							 P6630S2A1 P6630S3A1 P7070 P7422S1 P7472S1 ///
							 P7500S1A1 P7500S2A1 P7500S3A1 P7510S1A1 P7510S2A1 P6580S1 P6750)
	 

	 // household income
	 collapse (sum) inc1_nov`year' inc1_nt inc1 gov_income P6500 P6620S1 P6510S1  P6585S1A1 P6545S1 P6630S1 ///
			 P6630S2A1 P6630S3A1 P7070 P7422S1 P7472S1 ///
			 P7500S1A1 P7500S2A1 P7500S3A1 P7510S1A1 P7510S2A1 ///
			 P7510S3A1 P7510S5A1 P7510S6A1 P7510S7A1 P6580S1 P6750 ///
			 , by(DIRECTORIO SECUENCIA_P HOGAR fex_c_2011)
			 
			 
	 sum inc1 [aw = fex_c_2011] 	// looks good
	 winsor2 inc1, c(15 98) s(_w)  	// Get rid of odd values
	 winsor2 inc1_nt, c(15 98) s(_w)  	// Get rid of odd values
			 
			 
	 // add percentiles 
	 xtile pctl_w = inc1_w [aw=fex_c_2011], nq(10)
	 
	 forv pct = 1/10 {
		sum inc1_w if pctl_w == `pct'
	 }
	 
	 xtile pctl = inc1 [aw=fex_c_2011], nq(10)
	 forv pct = 1/10 {
		sum inc1 if pctl == `pct'
	 }

	 tempfile inc
	 save `inc', replace 
	 
	 
	 /*==================================================
				   2: Further cleaning 
	 ==================================================*/
	 
	 use `geih', clear
	 
	 merge m:1 DIRECTORIO SECUENCIA_P HOGAR using `inc', keep(3) nogen 
	 
	 // number of household members 
	 bys DIRECTORIO SECUENCIA_P: egen n_members = max(ORDEN)

	 // keep only head
	 keep if P6050 == 1 
	 	
	 // sex
	 gen female_head = (P6020 == 2)
	 gen male_head = (P6020 == 1)
	 
	 // age 
	 * Month of birth 
	 gen mob =ym(P6030S3,P6030S1)
	 format mob %tm
	 
	 * age to nov20
	 gen age_head = (td(1nov2020) - dofm(mob))/365.25
	 
	 // education head
	 gen no_education	= P6210 == 1
	 gen pre_school		= P6210 == 2
	 gen primary 		= P6210 == 3
	 gen middleschool 	= P6210 == 4
	 gen highschool 	= P6210 == 5
	 gen university 	= P6210 == 6
	 
	 // hsize 
	 ** already created n_members
	 
	 // urban (ish)
	 cap rename CLASE clase
	 if `year' != 2020 gen urban = area_s == "Área"
	 else gen urban = clase == 1 & !missing(clase)
	 replace urban = 1 if clase == 1
	 
	 // employement
	 ** employed people are in the employee section 
	 gen works = (_m_ocupados == 3)
	 
	 // formal employement 
	 gen pension_fund = (P6920 == 1) // exact question 
	 replace pension_fund = . if P6920 == .
	  ** fix according other questions
	  foreach v in P7420S1 P7420S2 P1519 P7460 P6921 P6920 P6980S1 P6980S2{
	  	*tab `v', m nol
	  	cap 		 replace pension_fund = 1 if `v' ==1
	  	if (_rc)	 replace pension_fund = 1 if `v' ==1
	  }
	  
	  gen formal_work = works == 1 & pension_fund == 1
	  replace formal_work = . if pension_fund == .
	  
	 // assets index
	 
	  gen own_washing_machine = P5210S4  == 1
	  gen own_fridge = P5210S5  == 1
	  gen own_blender = P5210S6  == 1
	  gen own_stove = P5210S7  == 1
	  gen own_oven = P5210S8  == 1
	  gen own_microw = P5210S9  == 1
	  gen own_wathe = P5210S10 == 1 
	  gen own_tv = P5210S11 == 1 
	  *gen own_dvd = P5210S14 == 1 
	  gen own_sound = P5210S15 == 1 
	  gen own_pc = P5210S16 == 1 
	  gen own_vacum = P5210S17 == 1 
	  gen own_ac = P5210S18 == 1 
	  gen own_fan = P5210S19 == 1 
	  gen own_bike = P5210S20 == 1 
	  gen own_moto = P5210S21 == 1 
	  gen own_car = P5210S22 == 1 
	  gen own_real_state = P5210S24 == 1
	  gen own_house	=  inrange(P5090, 1, 2)
	 
	  // anderson index assett
	 loc z2 ""
	 foreach v in own_washing_machine own_fridge own_blender own_stove own_oven own_microw own_wathe own_tv own_sound own_pc own_vacum own_ac own_fan own_bike own_moto own_car own_real_state own_house {
		cap drop z2`v'
		sum `v' [aw=fex_c_2011]
		loc cmean = `r(mean)'
		loc sd 	  = `r(sd)'
		gen z2`v' = (`v'-`cmean')/(`sd')
		loc z2		 "`z2' z2`v'"
		lab var z2`v' "std `v'" 
	 }
	 
	 icw_index `z2' , gen(iassets)
	 drop `z2'
	 replace iassets = iassets * 100
	 lab var iassets "Index of assets"
	 
	 // gen percapita income
	 gen pp_inc = inc1
	 replace pp_inc = pp_inc/n_members
	 
	 gen pp_inc_nt = inc1_nt
	 replace pp_inc_nt = pp_inc_nt/n_members
	 
	 /*==================================================
				3: Final save
	 ==================================================*/
	 
	 save "${dir3r}/03_auxiliar/GEIH_`year'11_foruse.dta", replace

 } // End year loop
 
exit
/* End of do-file */

><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><

Notes:
1.
2.
3.


Version Control:


